Load Libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidymodels)
## Registered S3 method overwritten by 'tune':
##   method                   from   
##   required_pkgs.model_spec parsnip
## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.4 ──
## ✓ broom        0.7.9      ✓ rsample      0.1.0 
## ✓ dials        0.0.10     ✓ tune         0.1.6 
## ✓ infer        1.0.0      ✓ workflows    0.2.3 
## ✓ modeldata    0.1.1      ✓ workflowsets 0.1.0 
## ✓ parsnip      0.1.7      ✓ yardstick    0.0.8 
## ✓ recipes      0.1.17
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(skimr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(vip)        
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
library(fastshap)   
## 
## Attaching package: 'fastshap'
## The following object is masked from 'package:vip':
## 
##     gen_friedman
## The following object is masked from 'package:dplyr':
## 
##     explain
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ISLR)
library(tree)
## Registered S3 method overwritten by 'tree':
##   method     from
##   print.tree cli
library(ggplot2)
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:yardstick':
## 
##     accuracy
library(urca)
library(pracma)
## 
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
## 
##     cross
library(astsa)
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ fma       2.4     ✓ expsmooth 2.3
## ── Conflicts ───────────────────────────────────────────────── fpp2_conflicts ──
## x forecast::accuracy() masks yardstick::accuracy()
## x pracma::cross()      masks purrr::cross()
## x scales::discard()    masks purrr::discard()
## 
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
## 
##     oil

Import data and Summarise data

wind <- read_csv("Turbine_Data.csv") %>%
  clean_names()
## Rows: 118224 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (7): Year, Month, Day, ActivePower, AmbientTemperature, WindDirection, ...
## dttm (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wind
wind_daily <- wind %>%
  group_by(year, month,day) %>%
  summarize(sum_active_power= sum(active_power,na.rm=TRUE),
            ambient_termperature=mean(ambient_temperature,na.rm=TRUE),
            wind_direction=mean(wind_direction,na.rm=TRUE),
            wind_speed=mean(wind_speed,na.rm=TRUE))%>%
  filter(year!=2017)
## `summarise()` has grouped output by 'year', 'month'. You can override using the `.groups` argument.

Create a time series object

# active_power
aseries<-subset(wind_daily, select=c(sum_active_power))
aseries<-na_if(aseries,0)
ap_ts <- ts(aseries, start=c(2018,1), frequency =365)
plot(ap_ts)

ap_tsi <- na_interpolation(ap_ts)
plot(ap_tsi)

ggAcf(ap_tsi)

ggPacf(ap_tsi)

# ambient_termperature and wind_speed
bseries<-subset(wind_daily, select=c(ambient_termperature))
bp_ts <- ts(bseries, start=c(2018,1), frequency =365)
bp_tsi <- na_interpolation(bp_ts)
plot(bp_tsi)

cseries<-subset(wind_daily, select=c(wind_speed))
cp_ts <- ts(cseries, start=c(2018,1), frequency =365)
cp_tsi <- na_interpolation(cp_ts)
plot(cp_tsi)

explore variables

ggplot(wind_daily, aes(x=ambient_termperature, y=sum_active_power)) + geom_point() 
## Warning: Removed 73 rows containing missing values (geom_point).

ggplot(wind_daily, aes(x=wind_direction, y=sum_active_power)) + geom_point() 
## Warning: Removed 70 rows containing missing values (geom_point).

ggplot(wind_daily, aes(x=wind_speed, y=sum_active_power)) + geom_point() 
## Warning: Removed 70 rows containing missing values (geom_point).

Deal with X

wind_daily%>%
  mutate(ambient_termperature=if_else(is.na(ambient_termperature),mean(ambient_termperature,na.rm=TRUE),ambient_termperature),
         wind_direction=if_else(is.na(wind_direction),mean(wind_direction,na.rm=TRUE),wind_direction),
         wind_speed=if_else(is.na(wind_speed),mean(wind_speed,na.rm=TRUE),wind_speed))->wind_daily

Ljung-Box test

Box.test(wind_daily$sum_active_power, lag=8, fitdf=0, type="Lj")
## 
##  Box-Ljung test
## 
## data:  wind_daily$sum_active_power
## X-squared = 2074.7, df = 8, p-value < 2.2e-16

ADF test

wind_df <- ur.df(ap_tsi, type = "drift")
summary(wind_df)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -148851  -18468   -1451   18043  172553 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.129e+04  1.924e+03   5.868 6.41e-09 ***
## z.lag.1     -1.446e-01  1.958e-02  -7.387 3.72e-13 ***
## z.diff.lag  -8.034e-02  3.491e-02  -2.302   0.0216 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33860 on 815 degrees of freedom
## Multiple R-squared:  0.08457,    Adjusted R-squared:  0.08233 
## F-statistic: 37.65 on 2 and 815 DF,  p-value: 2.299e-16
## 
## 
## Value of test-statistic is: -7.387 27.2866 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.43 -2.86 -2.57
## phi1  6.43  4.59  3.78

Model 1 - Auto Arima without intervention

Arima (1,0,0)(0,1,0)[365] RMSE:32256.62 MAPE:1118.073 Ljung box for Residuals: 1.443e-12

fit_auto <- auto.arima(ap_tsi)
summary(fit_auto)
## Series: ap_tsi 
## ARIMA(1,0,0)(0,1,0)[365] with drift 
## 
## Coefficients:
##          ar1    drift
##       0.6540  24.6560
## s.e.  0.0353  16.0104
## 
## sigma^2 estimated as 1.883e+09:  log likelihood=-5503.47
## AIC=11012.93   AICc=11012.99   BIC=11025.3
## 
## Training set error measures:
##                    ME     RMSE   MAE      MPE     MAPE      MASE      ACF1
## Training set 5.147842 32256.62 17573 166.8708 1118.073 0.4247732 0.0158162
checkresiduals(fit_auto)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0)(0,1,0)[365] with drift
## Q* = 321.13, df = 162, p-value = 1.443e-12
## 
## Model df: 2.   Total lags used: 164
accuracy(fit_auto)
##                    ME     RMSE   MAE      MPE     MAPE      MASE      ACF1
## Training set 5.147842 32256.62 17573 166.8708 1118.073 0.4247732 0.0158162
forecast(fit_auto, h=5)
##           Point Forecast      Lo 80    Hi 80     Lo 95    Hi 95
## 2020.2466       71543.28  15925.601 127161.0 -13516.62 156603.2
## 2020.2493       46509.03 -19947.988 112966.0 -55128.21 148146.3
## 2020.2521       76027.03   5439.817 146614.2 -31926.80 183980.9
## 2020.2548       52986.83 -19295.095 125268.8 -57558.84 163532.5
## 2020.2575       39975.67 -33019.181 112970.5 -71660.32 151611.7
fit_auto %>% forecast(h=5) %>% autoplot()

Model 2 - Auto Arima with intervention

Arima (1,1,1) RMSE:24091.68 MAPE:243.1563 Ljung box for Residuals: 3.109e-05

# mutate into data matrix
xr<-data.matrix(wind_daily[,5:7])
xr1<-data.matrix(wind_daily[,c(5,7)])
# impute new data
new<-read_csv("new.csv") %>%
  clean_names()
## New names:
## * `` -> ...1
## Rows: 11 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (3): Ambient Temperature, Wind Direction, Wind Speed
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
new<-data.matrix(new[,2:4])


fit_auto1 <- auto.arima(ap_tsi,xreg=xr1)
summary(fit_auto1)
## Series: ap_tsi 
## Regression with ARIMA(1,1,1) errors 
## 
## Coefficients:
##          ar1      ma1  ambient_termperature  wind_speed
##       0.4594  -0.9705             2470.5365  25303.9827
## s.e.  0.0367   0.0182              650.9531    906.0044
## 
## sigma^2 estimated as 5.84e+08:  log likelihood=-9426.96
## AIC=18863.92   AICc=18864   BIC=18887.46
## 
## Training set error measures:
##                    ME     RMSE     MAE       MPE     MAPE      MASE      ACF1
## Training set 177.2176 24091.68 15269.8 -41.25591 243.1563 0.3691003 0.0135854
checkresiduals(fit_auto1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,1) errors
## Q* = 241.85, df = 160, p-value = 3.109e-05
## 
## Model df: 4.   Total lags used: 164
accuracy(fit_auto1)
##                    ME     RMSE     MAE       MPE     MAPE      MASE      ACF1
## Training set 177.2176 24091.68 15269.8 -41.25591 243.1563 0.3691003 0.0135854
forecast(fit_auto1,xreg=new[,c(1,3)], h=5)
## Warning in forecast.forecast_ARIMA(fit_auto1, xreg = new[, c(1, 3)], h = 5):
## xreg contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit_auto1, xreg = new[, c(1, 3)], h = 5):
## Upper prediction intervals are not finite.
##           Point Forecast    Lo 80     Hi 80     Lo 95    Hi 95
## 2020.2466      102561.92 71592.63 133531.21 55198.480 149925.4
## 2020.2493       57785.39 23313.30  92257.47  5064.885 110505.9
## 2020.2521       50547.30 15188.59  85906.02 -3529.188 104623.8
## 2020.2548       75528.69 39881.22 111176.16 21010.595 130046.8
## 2020.2575       64156.39 28383.46  99929.31  9446.417 118866.4
## 2020.2603             NA       NA        NA        NA       NA
## 2020.2630             NA       NA        NA        NA       NA
## 2020.2658             NA       NA        NA        NA       NA
## 2020.2685             NA       NA        NA        NA       NA
## 2020.2712             NA       NA        NA        NA       NA
## 2020.2740             NA       NA        NA        NA       NA
fit_auto1 %>% forecast(xreg=new[,c(1,3)]) %>% autoplot()
## Warning in forecast.forecast_ARIMA(., xreg = new[, c(1, 3)]): xreg contains
## different column names from the xreg used in training. Please check that the
## regressors are in the same order.
## Warning in forecast.forecast_ARIMA(., xreg = new[, c(1, 3)]): Upper prediction
## intervals are not finite.

sfit_auto <- sarima(ap_tsi, 1, 1, 1, xreg=wind_daily[,c(5,7)])
## initial  value 10.226272 
## iter   2 value 10.198774
## iter   3 value 10.187815
## iter   4 value 10.180489
## iter   5 value 10.161963
## iter   6 value 10.146674
## iter   7 value 10.126460
## iter   8 value 10.116383
## iter   9 value 10.113957
## iter  10 value 10.093545
## iter  11 value 10.092075
## iter  12 value 10.091829
## iter  13 value 10.091815
## iter  14 value 10.091813
## iter  15 value 10.091812
## iter  16 value 10.091811
## iter  17 value 10.091810
## iter  17 value 10.091810
## iter  17 value 10.091810
## final  value 10.091810 
## converged
## initial  value 10.091431 
## iter   2 value 10.091419
## iter   3 value 10.091399
## iter   4 value 10.091396
## iter   5 value 10.091395
## iter   6 value 10.091394
## iter   7 value 10.091393
## iter   7 value 10.091393
## iter   7 value 10.091393
## final  value 10.091393 
## converged

summary(sfit_auto)
##                    Length Class  Mode   
## fit                14     Arima  list   
## degrees_of_freedom  1     -none- numeric
## ttable             16     -none- numeric
## AIC                 1     -none- numeric
## AICc                1     -none- numeric
## BIC                 1     -none- numeric
sfit_auto
## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), xreg = xreg, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##     REPORT = 1, reltol = tol))
## 
## Coefficients:
##          ar1      ma1  ambient_termperature  wind_speed
##       0.4594  -0.9705             2470.5365  25303.9827
## s.e.  0.0367   0.0182              650.9531    906.0044
## 
## sigma^2 estimated as 581117604:  log likelihood = -9426.96,  aic = 18863.92
## 
## $degrees_of_freedom
## [1] 815
## 
## $ttable
##                        Estimate       SE  t.value p.value
## ar1                      0.4594   0.0367  12.5023   0e+00
## ma1                     -0.9705   0.0182 -53.3230   0e+00
## ambient_termperature  2470.5365 650.9531   3.7953   2e-04
## wind_speed           25303.9827 906.0044  27.9292   0e+00
## 
## $AIC
## [1] 23.03287
## 
## $AICc
## [1] 23.03293
## 
## $BIC
## [1] 23.06162

Model 3 (1,0,0) with intervention & without seasonality

RMSE:24229.98 MAPE:325.1435 Ljung box for Residuals: 9.507e-07

sfit2 <- sarima(ap_tsi, 1, 0, 0, xreg=wind_daily[,c(5,7)])
## initial  value 10.236048 
## iter   2 value 10.100844
## iter   3 value 10.100305
## iter   4 value 10.096008
## iter   5 value 10.095933
## iter   6 value 10.095927
## iter   7 value 10.095927
## iter   8 value 10.095926
## iter   8 value 10.095926
## iter   8 value 10.095926
## final  value 10.095926 
## converged
## initial  value 10.095530 
## iter   2 value 10.095530
## iter   2 value 10.095530
## iter   2 value 10.095530
## final  value 10.095530 
## converged

summary(sfit2)
##                    Length Class  Mode   
## fit                14     Arima  list   
## degrees_of_freedom  1     -none- numeric
## ttable             16     -none- numeric
## AIC                 1     -none- numeric
## AICc                1     -none- numeric
## BIC                 1     -none- numeric
sfit2
## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), xreg = xreg, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##     REPORT = 1, reltol = tol))
## 
## Coefficients:
##          ar1   intercept  ambient_termperature  wind_speed
##       0.5102  -133235.53              2081.465   25494.192
## s.e.  0.0321    14602.27               471.824     712.723
## 
## sigma^2 estimated as 587091850:  log likelihood = -9441.86,  aic = 18893.73
## 
## $degrees_of_freedom
## [1] 816
## 
## $ttable
##                          Estimate         SE t.value p.value
## ar1                        0.5102     0.0321 15.9088       0
## intercept            -133235.5307 14602.2717 -9.1243       0
## ambient_termperature    2081.4650   471.8240  4.4115       0
## wind_speed             25494.1917   712.7230 35.7701       0
## 
## $AIC
## [1] 23.04113
## 
## $AICc
## [1] 23.04119
## 
## $BIC
## [1] 23.06985
fit2 <- Arima(ap_tsi, xreg=xr1,order=c(1,0,0))
summary(fit2)
## Series: ap_tsi 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1   intercept  ambient_termperature  wind_speed
##       0.5102  -133235.53              2081.465   25494.192
## s.e.  0.0321    14602.27               471.824     712.723
## 
## sigma^2 estimated as 5.9e+08:  log likelihood=-9441.86
## AIC=18893.73   AICc=18893.8   BIC=18917.27
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 5.485814 24229.98 15192.02 -115.4045 325.1435 0.3672202
##                      ACF1
## Training set -0.002748034
checkresiduals(fit2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,0,0) errors
## Q* = 260.09, df = 160, p-value = 9.507e-07
## 
## Model df: 4.   Total lags used: 164
ts.plot(ap_tsi,fitted(fit2), gpars=list(col=c("black","red")))

forecast(fit2, h=5, xreg=new[,c(1,3)])
## Warning in forecast.forecast_ARIMA(fit2, h = 5, xreg = new[, c(1, 3)]): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit2, h = 5, xreg = new[, c(1, 3)]): Upper
## prediction intervals are not finite.
##           Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
## 2020.2466       96785.70 65657.718 127913.68  49179.560 144391.84
## 2020.2493       48977.54 14031.700  83923.38  -4467.510 102422.59
## 2020.2521       40618.42  4745.222  76491.61 -14244.901  95481.74
## 2020.2548       65498.37 29387.648 101609.09  10271.787 120724.95
## 2020.2575       53354.25 17181.947  89526.55  -1966.514 108675.01
## 2020.2603             NA        NA        NA         NA        NA
## 2020.2630             NA        NA        NA         NA        NA
## 2020.2658             NA        NA        NA         NA        NA
## 2020.2685             NA        NA        NA         NA        NA
## 2020.2712             NA        NA        NA         NA        NA
## 2020.2740             NA        NA        NA         NA        NA
autoplot(forecast(fit2, xreg=new[,c(1,3)], h=5))
## Warning in forecast.forecast_ARIMA(fit2, xreg = new[, c(1, 3)], h = 5): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit2, xreg = new[, c(1, 3)], h = 5): Upper
## prediction intervals are not finite.

Model 4 (3,1,0) with intervention & without seasonality

RMSE:25582.9 MAPE:346.2267 Ljung box for Residuals: 1.20e-10

sfit4 <- sarima(ap_tsi, 3,1,0, xreg=wind_daily[,c(5,7)])
## initial  value 10.227488 
## iter   2 value 10.167577
## iter   3 value 10.154906
## iter   4 value 10.153711
## iter   5 value 10.153015
## iter   6 value 10.152325
## iter   7 value 10.152204
## iter   8 value 10.152167
## iter   9 value 10.152137
## iter  10 value 10.152110
## iter  11 value 10.152101
## iter  12 value 10.152100
## iter  12 value 10.152100
## final  value 10.152100 
## converged
## initial  value 10.150466 
## iter   2 value 10.150466
## iter   2 value 10.150466
## iter   2 value 10.150466
## final  value 10.150466 
## converged

summary(sfit4)
##                    Length Class  Mode   
## fit                14     Arima  list   
## degrees_of_freedom  1     -none- numeric
## ttable             20     -none- numeric
## AIC                 1     -none- numeric
## AICc                1     -none- numeric
## BIC                 1     -none- numeric
sfit4
## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), xreg = xreg, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##     REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ar1      ar2      ar3  ambient_termperature  wind_speed
##       -0.3425  -0.3029  -0.1829             3134.7100  24360.1819
## s.e.   0.0356   0.0351   0.0344              722.5729    971.0248
## 
## sigma^2 estimated as 655284114:  log likelihood = -9475.34,  aic = 18962.68
## 
## $degrees_of_freedom
## [1] 814
## 
## $ttable
##                        Estimate       SE t.value p.value
## ar1                     -0.3425   0.0356 -9.6080       0
## ar2                     -0.3029   0.0351 -8.6349       0
## ar3                     -0.1829   0.0344 -5.3146       0
## ambient_termperature  3134.7100 722.5729  4.3383       0
## wind_speed           24360.1819 971.0248 25.0871       0
## 
## $AIC
## [1] 23.15346
## 
## $AICc
## [1] 23.15355
## 
## $BIC
## [1] 23.18795
fit4 <- Arima(ap_tsi, xreg=xr1,order=c(3,1,0))
summary(fit4)
## Series: ap_tsi 
## Regression with ARIMA(3,1,0) errors 
## 
## Coefficients:
##           ar1      ar2      ar3  ambient_termperature  wind_speed
##       -0.3425  -0.3029  -0.1829             3134.7100  24360.1819
## s.e.   0.0356   0.0351   0.0344              722.5729    971.0248
## 
## sigma^2 estimated as 659309228:  log likelihood=-9475.34
## AIC=18962.68   AICc=18962.79   BIC=18990.93
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE     MASE        ACF1
## Training set 16.11635 25582.9 16270.99 -186.8391 346.2267 0.393301 -0.03229528
checkresiduals(fit4)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(3,1,0) errors
## Q* = 299.18, df = 159, p-value = 1.197e-10
## 
## Model df: 5.   Total lags used: 164
ts.plot(ap_tsi,fitted(fit4), gpars=list(col=c("black","red")))

forecast(fit4, h=5, xreg=new[,c(1,3)])
## Warning in forecast.forecast_ARIMA(fit4, h = 5, xreg = new[, c(1, 3)]): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit4, h = 5, xreg = new[, c(1, 3)]): Upper
## prediction intervals are not finite.
##           Point Forecast    Lo 80    Hi 80      Lo 95    Hi 95
## 2020.2466      108107.14 75200.71 141013.6 57781.1059 158433.2
## 2020.2493       65994.17 26611.33 105377.0  5763.3109 126225.0
## 2020.2521       60149.97 17816.09 102483.9 -4594.1117 124894.1
## 2020.2548       84546.07 39628.55 129463.6 15850.6439 153241.5
## 2020.2575       74173.07 25362.36 122983.8  -476.4683 148822.6
## 2020.2603             NA       NA       NA         NA       NA
## 2020.2630             NA       NA       NA         NA       NA
## 2020.2658             NA       NA       NA         NA       NA
## 2020.2685             NA       NA       NA         NA       NA
## 2020.2712             NA       NA       NA         NA       NA
## 2020.2740             NA       NA       NA         NA       NA
autoplot(forecast(fit4, xreg=new[,c(1,3)], h=5))
## Warning in forecast.forecast_ARIMA(fit4, xreg = new[, c(1, 3)], h = 5): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit4, xreg = new[, c(1, 3)], h = 5): Upper
## prediction intervals are not finite.

Model 5 (1,1,1) with intervention & seasonlity

RMSE:24243.55 MAPE:722.6617 Ljung box for Residuals: < 2.2e-16

sfit5 <- sarima(ap_tsi, 1, 1, 1, P=0,D=1,Q=0,S=182,xreg=wind_daily[,c(5,7)])
## initial  value 10.657882 
## iter   2 value 10.629598
## iter   3 value 10.618046
## iter   4 value 10.608885
## iter   5 value 10.590327
## iter   6 value 10.558984
## iter   7 value 10.554853
## iter   8 value 10.553174
## iter   9 value 10.550527
## iter  10 value 10.546856
## iter  11 value 10.545222
## iter  12 value 10.541784
## iter  13 value 10.528716
## iter  14 value 10.527094
## iter  15 value 10.525448
## iter  16 value 10.525370
## iter  17 value 10.525356
## iter  18 value 10.525351
## iter  18 value 10.525351
## iter  18 value 10.525351
## final  value 10.525351 
## converged
## initial  value 10.523862 
## iter   2 value 10.521715
## iter   3 value 10.521168
## iter   4 value 10.520286
## iter   5 value 10.520003
## iter   6 value 10.519782
## iter   7 value 10.519745
## iter   8 value 10.519731
## iter   9 value 10.519725
## iter  10 value 10.519714
## iter  11 value 10.519708
## iter  12 value 10.519695
## iter  13 value 10.519687
## iter  14 value 10.519687
## iter  14 value 10.519687
## iter  14 value 10.519687
## final  value 10.519687 
## converged

summary(sfit5)
##                    Length Class  Mode   
## fit                14     Arima  list   
## degrees_of_freedom  1     -none- numeric
## ttable             16     -none- numeric
## AIC                 1     -none- numeric
## AICc                1     -none- numeric
## BIC                 1     -none- numeric
sfit5
## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), xreg = xreg, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##     REPORT = 1, reltol = tol))
## 
## Coefficients:
##          ar1      ma1  ambient_termperature  wind_speed
##       0.4729  -0.9999             2419.6731  25526.4817
## s.e.  0.0359   0.0150              501.7418    767.1671
## 
## sigma^2 estimated as 1.344e+09:  log likelihood = -7604.9,  aic = 15219.81
## 
## $degrees_of_freedom
## [1] 633
## 
## $ttable
##                        Estimate       SE  t.value p.value
## ar1                      0.4729   0.0359  13.1858       0
## ma1                     -0.9999   0.0150 -66.7373       0
## ambient_termperature  2419.6731 501.7418   4.8225       0
## wind_speed           25526.4817 767.1671  33.2737       0
## 
## $AIC
## [1] 23.89295
## 
## $AICc
## [1] 23.89305
## 
## $BIC
## [1] 23.92793
fit5 <- Arima(ap_tsi, xreg=xr1,order=c(1,1,1),seasonal=list(order=c(0, 1, 0), period=365))

summary(fit5)
## Series: ap_tsi 
## Regression with ARIMA(1,1,1)(0,1,0)[365] errors 
## 
## Coefficients:
##          ar1     ma1  ambient_termperature  wind_speed
##       0.4860  -1.000             3741.7191   23135.069
## s.e.  0.0417   0.009              933.0387    1186.655
## 
## sigma^2 estimated as 1.071e+09:  log likelihood=-5367.91
## AIC=10745.81   AICc=10745.94   BIC=10766.4
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 679.8727 24243.55 11889.35 -201.992 722.6617 0.2873883 0.02783075
checkresiduals(fit5)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,1)(0,1,0)[365] errors
## Q* = 371.94, df = 160, p-value < 2.2e-16
## 
## Model df: 4.   Total lags used: 164
ts.plot(ap_tsi,fitted(fit5), gpars=list(col=c("black","red")))

forecast(fit5, h=5, xreg=new[,c(1,3)])
## Warning in forecast.forecast_ARIMA(fit5, h = 5, xreg = new[, c(1, 3)]): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit5, h = 5, xreg = new[, c(1, 3)]): Upper
## prediction intervals are not finite.
##           Point Forecast     Lo 80     Hi 80     Lo 95    Hi 95
## 2020.2466      100418.88 58433.315 142404.45  36207.50 164630.3
## 2020.2493       45562.77 -1159.146  92284.70 -25892.24 117017.8
## 2020.2521       48866.56  1075.291  96657.82 -24223.88 121957.0
## 2020.2548       56784.80  8735.204 104834.39 -16700.71 130270.3
## 2020.2575       49815.68  1700.806  97930.56 -23769.67 123401.0
## 2020.2603             NA        NA        NA        NA       NA
## 2020.2630             NA        NA        NA        NA       NA
## 2020.2658             NA        NA        NA        NA       NA
## 2020.2685             NA        NA        NA        NA       NA
## 2020.2712             NA        NA        NA        NA       NA
## 2020.2740             NA        NA        NA        NA       NA
autoplot(forecast(fit5, xreg=new[,c(1,3)], h=5))
## Warning in forecast.forecast_ARIMA(fit5, xreg = new[, c(1, 3)], h = 5): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
## Warning in forecast.forecast_ARIMA(fit5, xreg = new[, c(1, 3)], h = 5): Upper
## prediction intervals are not finite.

Model 6 Exponential Smoothing - Holt Winter

ap_ets <- ets(ap_tsi, model="ZNZ")
## Warning in ets(ap_tsi, model = "ZNZ"): I can't handle data with frequency
## greater than 24. Seasonality will be ignored. Try stlf() if you need seasonal
## forecasts.
summary(ap_ets)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = ap_tsi, model = "ZNZ") 
## 
##   Smoothing parameters:
##     alpha = 0.747 
## 
##   Initial states:
##     l = 47002.343 
## 
##   sigma:  34679.53
## 
##      AIC     AICc      BIC 
## 22650.03 22650.06 22664.16 
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 97.39945 34637.21 24563.88 347.5753 549.3757 0.5937561 0.05094701
checkresiduals(ap_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 256.46, df = 162, p-value = 3.208e-06
## 
## Model df: 2.   Total lags used: 164
forecast(ap_tsi, h=5)
##           Point Forecast    Lo 80     Hi 80      Lo 95    Hi 95
## 2020.2466       92433.85 60910.49 123957.21  44223.031 140644.7
## 2020.2493       56833.29 17172.59  96493.99  -3822.513 117489.1
## 2020.2521       75965.13 29573.11 122357.15   5014.663 146915.6
## 2020.2548       81062.22 28798.77 133325.66   1132.169 160992.3
## 2020.2575       70229.54 12690.71 127768.37 -17768.502 158227.6
autoplot(forecast(ap_ets, h=5))